In [1]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from datetime import timedelta
from matplotlib.ticker import MaxNLocator
%matplotlib inline

In [2]:
# plotting parameters
figure_size = (6.25984252, 4.1456693) # = 15.9, 10.53 cm

def rgb(rgb_255_tuple):
    return tuple(v/255 for v in rgb_255_tuple)

colour_op = rgb((13, 103, 53))
colour_s = rgb((254, 205, 8))
colour_p = rgb((237, 29, 36))

sns.set_palette('muted')

Raw data processing

Inflow profile calculation


In [3]:
df = pd.read_csv('CS3 data 1min 09-22_2017-10-11.pdi.gz')

# also make forward filled
time_interval = pd.Timedelta(minutes=1)
df = df.pivot_table(values='Value', columns='TagName', index=(pd.to_datetime(df['Date'].str.cat(df['Time'], sep=' ')) - time_interval))
df.index.name = 'DateTime'

df['Time_30'] = df.index.floor('30min').time

df['EskomDayOfWeek'] = df.index.dayofweek + 1
# replace holidays with Eskom days
h_path = '..\holidays.csv'
holidays = np.genfromtxt(h_path,  dtype=np.dtype('M'), delimiter=',', usecols=0)
holidays = list(holidays)
holidays_numbers = np.genfromtxt(h_path, dtype=int, delimiter=',', usecols=1)
holidays_numbers = list(holidays_numbers)
for h_n, h_d in zip(holidays_numbers, holidays):
    df.loc[df.index.date==h_d,'EskomDayOfWeek'] = h_n
# drop weekends
df = df[df['EskomDayOfWeek'] <= 5]
df.drop('EskomDayOfWeek', axis=1)

df.head(5)


Out[3]:
TagName KDCE_S07_00INS00_00ILT#501.FA_PV KDCE_S07_20PMP00_00ILT#501.FA_PV KDCE_S07_20PMP01_Machine.FA_RF KDCE_S07_20PMP02_Machine.FA_RF KDCE_S07_20PMP03_Machine.FA_RF KDCE_S07_20PMP04_Machine.FA_RF KDCE_S07_22PMP01_Machine.FA_RF KDCE_S07_22PMP02_Machine.FA_RF KDCE_S07_31PMP00_00ILT#501.FA_PV KDCE_S07_31PMP00_01ILT#501.FA_PV ... KDCE_S07_41PMP05_Machine.FA_RF KDCE_S07_IMPMP00_00ILT#501.FA_PV KDCE_S07_IMPMP00_00ILT#502.FA_PV KDCE_S07_IMPMP01_Machine.FA_RF KDCE_S07_IMPMP02_Machine.FA_RF KDCE_S07_IMPMP03_Machine.FA_RF KDCE_S07_IMPMP04_Machine.FA_RF KDCE_S07_IMPMP05_Machine.FA_RF Time_30 EskomDayOfWeek
DateTime
2017-09-22 00:00:00 74.774956 70.753385 0.0 1.0 1.0 0.0 0.0 0.0 53.728382 53.085213 ... 0.0 0.0 72.891219 1.0 0.0 0.0 0.0 1.0 00:00:00 5
2017-09-22 00:01:00 74.818324 70.489669 0.0 1.0 1.0 0.0 0.0 0.0 53.910274 53.256931 ... 0.0 0.0 72.941282 1.0 0.0 0.0 0.0 1.0 00:00:00 5
2017-09-22 00:02:00 75.104757 70.230040 0.0 1.0 1.0 0.0 0.0 0.0 54.085156 53.412698 ... 0.0 0.0 73.050708 1.0 0.0 0.0 0.0 1.0 00:00:00 5
2017-09-22 00:03:00 75.423400 69.950854 0.0 1.0 1.0 0.0 0.0 0.0 54.270035 53.605735 ... 0.0 0.0 73.120918 1.0 0.0 0.0 0.0 1.0 00:00:00 5
2017-09-22 00:04:00 75.682563 69.696494 0.0 1.0 1.0 0.0 0.0 0.0 54.430631 53.766142 ... 0.0 0.0 73.208032 1.0 0.0 0.0 0.0 1.0 00:00:00 5

5 rows × 29 columns


In [4]:
df = df.rename(columns = {'KDCE_S07_00INS00_00ILT#501.FA_PV': 'Surface Level'})

df = df.rename(columns = {'KDCE_S07_41PMP00_00ILT#501.FA_PV': '41L Level'})

df = df.rename(columns = {'KDCE_S07_31PMP00_00ILT#501.FA_PV': '31L Level'})

df = df.rename(columns = {'KDCE_S07_20PMP00_00ILT#501.FA_PV': '20L Level'})
#df.drop([col for col in df.columns if 'KDCE_S07_20PMP' in col], axis=1, inplace=True)

df['IPC Level'] = df[['KDCE_S07_IMPMP00_00ILT#501.FA_PV', 'KDCE_S07_IMPMP00_00ILT#502.FA_PV']].sum(axis=1)
df.drop(['KDCE_S07_IMPMP00_00ILT#501.FA_PV', 'KDCE_S07_IMPMP00_00ILT#502.FA_PV'], axis=1, inplace=True)

In [5]:
status_col_dict = {}
for c in df.columns:
    if '_Machine.FA_RF' in c:
        item = c.split('_')[2].split('PMP')
        level = item[0] + 'L'
        level = 'IPC' if level == 'IML' else level
        pump_num = 'P' + item[1][1]
        status_col_dict[c] = level + ' ' + pump_num
        
df = df.rename(columns = status_col_dict)

In [6]:
df.head(5)


Out[6]:
TagName Surface Level 20L Level 20L P1 20L P2 20L P3 20L P4 22L P1 22L P2 31L Level KDCE_S07_31PMP00_01ILT#501.FA_PV ... 41L P4 41L P5 IPC P1 IPC P2 IPC P3 IPC P4 IPC P5 Time_30 EskomDayOfWeek IPC Level
DateTime
2017-09-22 00:00:00 74.774956 70.753385 0.0 1.0 1.0 0.0 0.0 0.0 53.728382 53.085213 ... 1.0 0.0 1.0 0.0 0.0 0.0 1.0 00:00:00 5 72.891219
2017-09-22 00:01:00 74.818324 70.489669 0.0 1.0 1.0 0.0 0.0 0.0 53.910274 53.256931 ... 1.0 0.0 1.0 0.0 0.0 0.0 1.0 00:00:00 5 72.941282
2017-09-22 00:02:00 75.104757 70.230040 0.0 1.0 1.0 0.0 0.0 0.0 54.085156 53.412698 ... 1.0 0.0 1.0 0.0 0.0 0.0 1.0 00:00:00 5 73.050708
2017-09-22 00:03:00 75.423400 69.950854 0.0 1.0 1.0 0.0 0.0 0.0 54.270035 53.605735 ... 1.0 0.0 1.0 0.0 0.0 0.0 1.0 00:00:00 5 73.120918
2017-09-22 00:04:00 75.682563 69.696494 0.0 1.0 1.0 0.0 0.0 0.0 54.430631 53.766142 ... 1.0 0.0 1.0 0.0 0.0 0.0 1.0 00:00:00 5 73.208032

5 rows × 28 columns

Process selected data


In [7]:
def calculate_inflows(level_name, pump_flowrates, capacity, feeding_level_name=None):
    # feeding_level_name is the level feeding this level
    
    number_of_pumps = len(pump_flowrates)

    calc_pump_flows = [np.nan]
    calc_inflows = [np.nan]

    for i in range(1, len(df.index)):
        pump_status = {}
        for j,_ in enumerate(pump_flowrates):
            pump_str = 'P' + str(j+1)
            pump_status[pump_str] = df[level_name + ' ' + pump_str].values[i]
        
        l_new = df[level_name + ' Level'].values[i]
        l_old = df[level_name + ' Level'].values[i-1]
        pump_flow_from_prev_lev = 0 if feeding_level_name is None else df[feeding_level_name + ' PumpFlow'].values[i]
        
        if np.isnan([v for k,v in pump_status.items()] + [l_new] + [l_old] + [pump_flow_from_prev_lev]).any():
            ans_pumpflow = np.nan
            ans_inflow = np.nan

        delta_level = l_new - l_old
        if delta_level != 0:
            ans_outflow_pumps = 0
            for ps, pf in zip(pump_status.items(), pump_flowrates):
                ans_outflow_pumps += ps[1] * pf
                
            ans_inflow = ((l_new - l_old) / 100 * capacity) / 60 + ans_outflow_pumps - pump_flow_from_prev_lev
        else:
            ans_outflow_pumps = np.nan
            ans_inflow = np.nan

        calc_pump_flows.append(ans_outflow_pumps)
        calc_inflows.append(ans_inflow)

    df[level_name + ' PumpFlow'] = calc_pump_flows
    df[level_name + ' Inflow'] = calc_inflows

In [8]:
calculate_inflows('41L', [216.8]*5, 3000000)
calculate_inflows('31L', [146.8]*4, 3000000, '41L')
calculate_inflows('20L', [171.8]*4, 3000000, '31L')
calculate_inflows('IPC', [147.4]*5, 3000000, '20L')

In [9]:
# create dummy column for simulating surface
df['Surface P1'] = [0] * len(df.index)
calculate_inflows('Surface', [0]*1, 5000000, 'IPC')

In [10]:
df.head(5)


Out[10]:
TagName Surface Level 20L Level 20L P1 20L P2 20L P3 20L P4 22L P1 22L P2 31L Level KDCE_S07_31PMP00_01ILT#501.FA_PV ... 41L Inflow 31L PumpFlow 31L Inflow 20L PumpFlow 20L Inflow IPC PumpFlow IPC Inflow Surface P1 Surface PumpFlow Surface Inflow
DateTime
2017-09-22 00:00:00 74.774956 70.753385 0.0 1.0 1.0 0.0 0.0 0.0 53.728382 53.085213 ... NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN
2017-09-22 00:01:00 74.818324 70.489669 0.0 1.0 1.0 0.0 0.0 0.0 53.910274 53.256931 ... 105.100322 146.8 20.945876 343.6 64.942103 294.8 -23.768510 0 0.0 -258.660012
2017-09-22 00:02:00 75.104757 70.230040 0.0 1.0 1.0 0.0 0.0 0.0 54.085156 53.412698 ... 117.960832 146.8 17.441095 343.6 66.985655 294.8 5.912815 0 0.0 -56.105646
2017-09-22 00:03:00 75.423400 69.950854 0.0 1.0 1.0 0.0 0.0 0.0 54.270035 53.605735 ... 121.131652 146.8 22.439658 343.6 57.206746 294.8 -13.695150 0 0.0 -29.264634
2017-09-22 00:04:00 75.682563 69.696494 0.0 1.0 1.0 0.0 0.0 0.0 54.430631 53.766142 ... 107.716550 146.8 10.297707 343.6 69.619970 294.8 -5.242746 0 0.0 -78.830270

5 rows × 39 columns

Export data for use in simulation

Inflow profile


In [11]:
df2 = df.copy()

date_start = '2017-09-22'
date_end = '2017-09-23'

df2 = df2[(df2.index>=date_start) & (df2.index<date_end)]

inflow_profiles = []
overall_day_completeness_count = 0
overall_day_completeness_max = 0
overall_completeness_count = 0
overall_completeness_max = 0
for l in ['41L', '31L', '20L', 'IPC', 'Surface']:
    grouped = df2.set_index('Time_30')[l + ' Inflow'].groupby('Time_30')
    
    grouped_mean = grouped.mean()
    inflow_profiles.append(grouped_mean)
    
    print('{} completeness: {:6.2f}% → {:6.2f}%'.format(l, grouped.count().sum()/48/30*100, grouped_mean.count()/48*100))#grouped.count()/30
    overall_day_completeness_count += grouped.count().sum()
    overall_day_completeness_max += 48*30
    overall_completeness_count += grouped_mean.count()
    overall_completeness_max += 48
print('                   ------   -------')
print('All completenes:  {:6.2f}% → {:6.2f}%'.format(overall_day_completeness_count/overall_day_completeness_max*100, overall_completeness_count/overall_completeness_max*100))
    
pd.DataFrame(inflow_profiles).T.to_csv('..\..\simulations\Case_study_3\input\CS3_dam_inflow_profiles.csv.gz', compression='gzip')


41L completeness:  96.18% → 100.00%
31L completeness:  90.35% → 100.00%
20L completeness:  79.65% → 100.00%
IPC completeness:  84.03% → 100.00%
Surface completeness:  99.58% → 100.00%
                   ------   -------
All completenes:   89.96% → 100.00%

Data for validation of the simulation


In [12]:
df_real = pd.read_csv('CS3 data 1s 09-22_2017-10-11_pivot.csv.gz')
df_real = df_real.set_index('DateTime')
df_real.index = pd.to_datetime(df_real.index)
df_real.head(5)


Out[12]:
KDCE_S07_00INS00_00ILT#501.FA_PV KDCE_S07_20PMP00_00ILT#501.FA_PV KDCE_S07_20PMP01_Machine.FA_RF KDCE_S07_20PMP02_Machine.FA_RF KDCE_S07_20PMP03_Machine.FA_RF KDCE_S07_20PMP04_Machine.FA_RF KDCE_S07_22PMP01_Machine.FA_RF KDCE_S07_22PMP02_Machine.FA_RF KDCE_S07_31PMP00_00ILT#501.FA_PV KDCE_S07_31PMP00_01ILT#501.FA_PV ... KDCE_S07_41PMP03_Machine.FA_RF KDCE_S07_41PMP04_Machine.FA_RF KDCE_S07_41PMP05_Machine.FA_RF KDCE_S07_IMPMP00_00ILT#501.FA_PV KDCE_S07_IMPMP00_00ILT#502.FA_PV KDCE_S07_IMPMP01_Machine.FA_RF KDCE_S07_IMPMP02_Machine.FA_RF KDCE_S07_IMPMP03_Machine.FA_RF KDCE_S07_IMPMP04_Machine.FA_RF KDCE_S07_IMPMP05_Machine.FA_RF
DateTime
2017-09-22 00:00:00 74.710648 70.804398 0.0 1.0 1.0 0.0 0.0 0.0 53.645832 53.067131 ... 0.0 1.0 0.0 0.0 73.032410 1.0 0.0 0.0 0.0 1.0
2017-09-22 00:00:01 74.756136 70.804398 0.0 1.0 1.0 0.0 0.0 0.0 53.645832 53.067131 ... 0.0 1.0 0.0 0.0 73.032410 1.0 0.0 0.0 0.0 1.0
2017-09-22 00:00:02 74.826393 70.804398 0.0 1.0 1.0 0.0 0.0 0.0 53.645832 53.067131 ... 0.0 1.0 0.0 0.0 72.927197 1.0 0.0 0.0 0.0 1.0
2017-09-22 00:00:03 74.806138 70.804398 0.0 1.0 1.0 0.0 0.0 0.0 53.645832 53.067131 ... 0.0 1.0 0.0 0.0 72.916664 1.0 0.0 0.0 0.0 1.0
2017-09-22 00:00:04 74.710648 70.804398 0.0 1.0 1.0 0.0 0.0 0.0 53.645832 53.067131 ... 0.0 1.0 0.0 0.0 72.916664 1.0 0.0 0.0 0.0 1.0

5 rows × 27 columns


In [13]:
df_real['41L Status'] = df_real[[col for col in df_real.columns if ('41PMP' in col and 'Machine.FA_RF' in col)]].sum(skipna=False, axis=1)
df_real = df_real.rename(columns = {'KDCE_S07_41PMP00_00ILT#501.FA_PV': '41L Level'})
df_real.drop([col for col in df_real.columns if 'KDCE_S07_41PMP' in col], axis=1, inplace=True)

df_real['31L Status'] = df_real[[col for col in df_real.columns if ('31PMP' in col and 'Machine.FA_RF' in col)]].sum(skipna=False, axis=1)
df_real['31L Level'] = df_real[['KDCE_S07_31PMP00_00ILT#501.FA_PV', 'KDCE_S07_31PMP00_01ILT#501.FA_PV']].mean(axis=1)
df_real.drop([col for col in df_real.columns if 'KDCE_S07_31PMP' in col], axis=1, inplace=True)

df_real['20L Status'] = df_real[[col for col in df_real.columns if ('20PMP' in col and 'Machine.FA_RF' in col)]].sum(skipna=False, axis=1)
df_real = df_real.rename(columns = {'KDCE_S07_20PMP00_00ILT#501.FA_PV': '20L Level'})
df_real.drop([col for col in df_real.columns if 'KDCE_S07_20PMP' in col], axis=1, inplace=True)
df_real.drop([col for col in df_real.columns if 'KDCE_S07_22PMP' in col], axis=1, inplace=True)

df_real['IPC Status'] = df_real[[col for col in df_real.columns if ('IMPMP' in col and 'Machine.FA_RF' in col)]].sum(skipna=False, axis=1)
df_real['IPC Level'] = df_real[['KDCE_S07_IMPMP00_00ILT#501.FA_PV', 'KDCE_S07_IMPMP00_00ILT#502.FA_PV']].sum(axis=1)
df_real.drop([col for col in df_real.columns if 'KDCE_S07_IMPMP' in col], axis=1, inplace=True)

df_real = df_real.rename(columns = {'KDCE_S07_00INS00_00ILT#501.FA_PV': 'Surface Level'})

df_real.head(5)


Out[13]:
Surface Level 20L Level 41L Level 41L Status 31L Status 31L Level 20L Status IPC Status IPC Level
DateTime
2017-09-22 00:00:00 74.710648 70.804398 61.400463 1.0 1.0 53.356482 2.0 2.0 73.032410
2017-09-22 00:00:01 74.756136 70.804398 61.400463 1.0 1.0 53.356482 2.0 2.0 73.032410
2017-09-22 00:00:02 74.826393 70.804398 61.400463 1.0 1.0 53.356482 2.0 2.0 72.927197
2017-09-22 00:00:03 74.806138 70.804398 61.400463 1.0 1.0 53.356482 2.0 2.0 72.916664
2017-09-22 00:00:04 74.710648 70.804398 61.400463 1.0 1.0 53.356482 2.0 2.0 72.916664

In [14]:
date_range_start = '2017-09-22'
date_range_end = '2017-09-23'
df_real2 = df_real.ix[date_range_start:date_range_end]

df_real2.loc[df_real2['20L Level'] < 48, '20L Level'] = np.nan
df_real2['20L Level'] = df_real2['20L Level'].interpolate('pchip')

level_list = ['41L', '31L', '20L', 'IPC', 'Surface']
level_tag_list = [l + ' Level' for l in level_list]
status_tag_list = [l + ' Status' for l in level_list if l is not 'Surface']
tag_list = level_tag_list + status_tag_list

df_real2[level_tag_list].plot()
df_real2[status_tag_list].plot()
plt.show()
plt.close('all')


C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until

Save simulation testing data


In [15]:
df_real2[tag_list].to_csv('..\..\simulations\Case_study_3\input\CS3_data_for_validation.csv.gz', compression='gzip')

Now perform the simulation in validation mode

If the simulation outputs are reasonably "the same" as the real data, the simulation is "correct"

Run the simulation in validation_mode using initial values for dam level and following pump status instead of scheduler


In [16]:
df_sim = pd.read_csv('..\..\simulations\Case_study_3\output\CS3_simulation_data_export_validation.csv.gz')
df_sim['time_clean'] = pd.to_datetime(df_sim['seconds'], unit='s') + timedelta(days=17059)
df_sim.head(5)


Out[16]:
seconds 41L Level 41L Status 31L Level 31L Status 20L Level 20L Status IPC Level IPC Status Surface Level Surface Status Eskom ToU Pump system total power time_clean
0 0 61.400463 1.0 53.356482 1.0 70.804398 2.0 73.032410 2.0 74.710648 0.0 3 21579.6 2016-09-15 00:00:00
1 1 61.399042 1.0 53.359277 1.0 70.799892 2.0 73.033564 2.0 74.710960 2.0 3 21579.6 2016-09-15 00:00:01
2 2 61.397620 1.0 53.362073 1.0 70.795386 2.0 73.034717 2.0 74.711272 2.0 3 21579.6 2016-09-15 00:00:02
3 3 61.396199 1.0 53.364869 1.0 70.790880 2.0 73.035871 2.0 74.711584 2.0 3 21579.6 2016-09-15 00:00:03
4 4 61.394777 1.0 53.367665 1.0 70.786374 2.0 73.037025 2.0 74.711896 2.0 3 21579.6 2016-09-15 00:00:04

In [17]:
def rmse(real, sim):
    return np.sqrt(((real - sim) ** 2).mean())

def r2(x, y):
    return stats.pearsonr(x,y)[0] ** 2

Comparison of simulation and actual data

41L pump status


In [18]:
level = '41L'

In [19]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Status'].values
y2 = df_sim[level + ' Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [20]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 3])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_status.png', bbox_inches='tight')
plt.close('all')


41L dam level


In [21]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Level'].values
y2 = df_sim[level + ' Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.958117609774
RMSE =  2.80525084967

In [22]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_level.png', bbox_inches='tight')
plt.close('all')


31L pump status


In [23]:
level = '31L'

x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Status'].values
y2 = df_sim[level + ' Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [24]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 3])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_status.png', bbox_inches='tight')
plt.close('all')


31L dam level


In [25]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Level'].values
y2 = df_sim[level + ' Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.969670913439
RMSE =  3.79621625321

In [26]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_level.png', bbox_inches='tight')
plt.close('all')


20L pump status


In [27]:
level = '20L'

x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Status'].values
y2 = df_sim[level + ' Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [28]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 3])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_status.png', bbox_inches='tight')
plt.close('all')


20L dam level


In [29]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Level'].values
y2 = df_sim[level + ' Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.920368970073
RMSE =  4.18265170027

In [30]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_level.png', bbox_inches='tight')
plt.close('all')


IM-level (IPC) pump status


In [31]:
level = 'IPC'

x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Status'].values
y2 = df_sim[level + ' Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [32]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_status.png', bbox_inches='tight')
plt.close('all')


IM-level (IPC) dam level


In [33]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Level'].values
y2 = df_sim[level + ' Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.988562799502
RMSE =  1.75025274911

In [34]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_level.png', bbox_inches='tight')
plt.close('all')


Surface dam level


In [35]:
level = 'Surface'

x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2[level + ' Level'].values
y2 = df_sim[level + ' Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.994550729751
RMSE =  0.813984922897

In [36]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2[level + ' Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim[level + ' Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-09-15 00:00:00','2016-09-15 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-09-15 06:00:00','2016-09-15 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-09-15 07:00:00','2016-09-15 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-09-15 10:00:00','2016-09-15 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 18:00:00','2016-09-15 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-09-15 20:00:00','2016-09-15 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-09-15 22:00:00','2016-09-15 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_validation_' + level + '_level.png', bbox_inches='tight')
plt.close('all')


Processing of x-factored simulation data


In [37]:
def tou_shade(ax, date):
    y_m_d = '{}-{:02}-{:02}'.format(date.year, date.month, date.day)
    ax.axvspan(y_m_d + ' 00:00:00', y_m_d + ' 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
    ax.axvspan(y_m_d + ' 06:00:00', y_m_d + ' 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
    ax.axvspan(y_m_d + ' 07:00:00', y_m_d + ' 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
    ax.axvspan(y_m_d + ' 10:00:00', y_m_d + ' 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 18:00:00', y_m_d + ' 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 20:00:00', y_m_d + ' 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 22:00:00', y_m_d + ' 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

def sim_plot(case_study, factor, mode, level_name, level_limits, x, y, y_lim=100, save_fig=False, chart_type_override=None):
    fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

    if mode != 'power' and mode != 'power_raw':
        for l in level_limits:
            ax1.plot([x[0], x[len(x)-1]], [l, l], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')

        lines1 = ax1.plot(x, df[level_name + ' Level'])

        ax2 = ax1.twinx()
        lines2 = ax2.plot(x, df[level_name + ' Status'], color=sns.color_palette('muted')[2])
        
        tou_shade(ax2, x[0])

        ax1.set_ylabel('Dam level (%)')
        ax1.set_ylim([0, y_lim])
        ax2.set_ylabel('Number of pumps running')
        ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
        
        ax1.set_zorder(ax2.get_zorder()+1)
        ax1.patch.set_visible(False)

        handles, labels = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        del handles[1]
        del labels[1]
        handles += handles2
        labels += labels2
        order = [3, 1, 4, 0, 5, 2]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

        ax1.yaxis.label.set_color(sns.color_palette('muted')[0])
        ax2.yaxis.label.set_color(sns.color_palette('muted')[2])
    
    else:
        lbl = 'Power (resampled, step plot)' if mode == 'power' else 'Power (raw data, 1 second)'
        ax1.plot(x, y, drawstyle='steps-post', label=lbl, zorder=3)
        tou_shade(ax1, x[0])
        ax1.set_ylabel('Power (kW)')
        if y_lim is not None:
            ax1.set_ylim([0, y_lim])
        
        handles, labels = ax1.get_legend_handles_labels()
        order = [1, 0, 2, 3]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
    
    ax1.set_xlabel('Time of day')
    if mode != 'power' and mode != 'power_raw':
        ax1.yaxis.set_ticks(np.arange(0, 101, 10))
    ax1.xaxis.set_major_locator(mdates.HourLocator())
    ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    fig.autofmt_xdate(rotation=90)
    ax1.set_xlim((min(x), max(x)))

    ax1.grid(which='major', alpha=0.5)
    
    fig.tight_layout()
    
    if save_fig:
        if mode == 'power_raw':
            chart_type = 'power_raw'
        elif mode == 'power':
            chart_type = 'power_resampled'
        else:
            chart_type = 'dam_level_and_status'
                
        filename = 'output/CS{}_{}_{}_{}.png'.format(case_study, level_name, factor, chart_type)
        fig.savefig(filename, bbox_inches='tight')
    
    return fig

1-factor


In [38]:
df = pd.read_csv('..\..\simulations\Case_study_3\output\CS3_simulation_data_export_1-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df = df.rename(columns = {'IPC Level': 'IM Level'})
df = df.rename(columns = {'IPC Status': 'IM Status'})
df.head(5)


Out[38]:
seconds 41L Level 41L Status 31L Level 31L Status 20L Level 20L Status IM Level IM Status Surface Level Surface Status Eskom ToU Pump system total power time_clean
0 0 61.400463 1.0 53.356482 1.0 70.804398 2.0 73.032410 2.0 74.710648 0 3 21579.6 1970-01-01 00:00:00
1 1 61.399042 1.0 53.359277 1.0 70.799892 2.0 73.033564 2.0 74.710960 0 3 21579.6 1970-01-01 00:00:01
2 2 61.397620 1.0 53.362073 1.0 70.795386 2.0 73.034717 2.0 74.711272 0 3 21579.6 1970-01-01 00:00:02
3 3 61.396199 1.0 53.364869 1.0 70.790880 2.0 73.035871 2.0 74.711584 0 3 21579.6 1970-01-01 00:00:03
4 4 61.394777 1.0 53.367665 1.0 70.786374 2.0 73.037025 2.0 74.711896 0 3 21579.6 1970-01-01 00:00:04

In [39]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


41L Min: 44.998161689165%  Max: 78.67035210521945%
31L Min: 39.67187029864268%  Max: 78.16208020312223%
20L Min: 41.99847499747795%  Max: 92.20856555448265%
IM Min: 39.99494455444503%  Max: 88.62892075539554%
Surface Min: 60.688402728128914%  Max: 104.52999244534278%

Plot dam level and status


In [40]:
fig = sim_plot(3, 1, 'level', '41L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [41]:
fig = sim_plot(3, 1, 'level', '31L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [42]:
fig = sim_plot(3, 1, 'level', '20L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [43]:
fig = sim_plot(3, 1, 'level', 'IM', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [44]:
level_name = 'Surface'

x = df['time_clean']

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

ax1.plot([x[0], x[len(x)-1]], [40, 40], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
ax1.plot([x[0], x[len(x)-1]], [100, 100], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
lines1 = ax1.plot(x, df[level_name + ' Level'])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)


ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])
ax1.yaxis.set_ticks(np.arange(0, 101, 10))
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x), max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
del handles[1]
del labels[1]
order = [2, 1, 3, 0, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_' + level_name + '_1_level_and_status.png', bbox_inches='tight')
plt.close('all')


Plot power

Raw data


In [45]:
fig = sim_plot(3, 1, 'power_raw', None, None, df['time_clean'], df['Pump system total power'], y_lim=None, save_fig=True)


Resampled data


In [46]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS3_resampled_power_1_factor.csv')

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
#fig.patch.set_facecolor('grey')

x = resampled.index
y = resampled['total_30_minute']

ax1.plot(x,y, 'x', label="Power (data points resampled to 30 minutes)", zorder=4)
ax1.plot(x,y, '--', label="Power (resampled, line plot)", zorder=2)
ax1.plot(x,y, drawstyle='steps-post', label="Power (resampled, step plot)", zorder=3)
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Power consumption (kW)')
#ax1.set_ylim([0, 4000])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_1_power_resampled_all.png', bbox_inches='tight')
plt.close('all')



In [47]:
fig = sim_plot(3, 1, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)


2-factor


In [48]:
df = pd.read_csv('..\..\simulations\Case_study_3\output\CS3_simulation_data_export_2-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df = df.rename(columns = {'IPC Level': 'IM Level'})
df = df.rename(columns = {'IPC Status': 'IM Status'})
df.head(5)


Out[48]:
seconds 41L Level 41L Status 31L Level 31L Status 20L Level 20L Status IM Level IM Status Surface Level Surface Status Eskom ToU Pump system total power time_clean
0 0 61.400463 1.0 53.356482 1.0 70.804398 2.0 73.032410 2.0 74.710648 0 3 21579.6 1970-01-01 00:00:00
1 1 61.399042 1.0 53.359277 1.0 70.799892 2.0 73.033564 2.0 74.710960 0 3 21579.6 1970-01-01 00:00:01
2 2 61.397620 1.0 53.362073 1.0 70.795386 2.0 73.034717 2.0 74.711272 0 3 21579.6 1970-01-01 00:00:02
3 3 61.396199 1.0 53.364869 1.0 70.790880 2.0 73.035871 2.0 74.711584 0 3 21579.6 1970-01-01 00:00:03
4 4 61.394777 1.0 53.367665 1.0 70.786374 2.0 73.037025 2.0 74.711896 0 3 21579.6 1970-01-01 00:00:04

In [49]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


41L Min: 44.998161689165%  Max: 78.67035210521945%
31L Min: 39.67187029864268%  Max: 78.16208020312223%
20L Min: 41.99847499747795%  Max: 92.20856555448265%
IM Min: 46.131910281981796%  Max: 88.62892075539554%
Surface Min: 60.688402728130576%  Max: 100.0015361164498%

Plot dam level and status


In [50]:
fig = sim_plot(3, 2, 'level', '41L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [51]:
fig = sim_plot(3, 2, 'level', '31L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [52]:
fig = sim_plot(3, 2, 'level', '20L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [53]:
fig = sim_plot(3, 2, 'level', 'IM', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [54]:
level_name = 'Surface'

x = df['time_clean']

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

ax1.plot([x[0], x[len(x)-1]], [40, 40], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
ax1.plot([x[0], x[len(x)-1]], [100, 100], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
lines1 = ax1.plot(x, df[level_name + ' Level'])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)


ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])
ax1.yaxis.set_ticks(np.arange(0, 101, 10))
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x), max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
del handles[1]
del labels[1]
order = [2, 1, 3, 0, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_' + level_name + '_2_level_and_status.png', bbox_inches='tight')
plt.close('all')


Plot power


In [55]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS3_resampled_power_2_factor.csv')

fig = sim_plot(3, 2, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)


n-factor


In [56]:
df = pd.read_csv('..\..\simulations\Case_study_3\output\CS3_simulation_data_export_n-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df = df.rename(columns = {'IPC Level': 'IM Level'})
df = df.rename(columns = {'IPC Status': 'IM Status'})
df.head(5)


Out[56]:
seconds 41L Level 41L Status 31L Level 31L Status 20L Level 20L Status IM Level IM Status Surface Level Surface Status Eskom ToU Pump system total power time_clean
0 0 61.400463 1.0 53.356482 1.0 70.804398 2.0 73.032410 2.0 74.710648 0 3 21579.6 1970-01-01 00:00:00
1 1 61.399042 1.0 53.359277 1.0 70.799892 2.0 73.028650 3.0 74.713908 0 3 25152.4 1970-01-01 00:00:01
2 2 61.397620 1.0 53.362073 1.0 70.795386 2.0 73.024891 3.0 74.717168 0 3 25152.4 1970-01-01 00:00:02
3 3 61.396199 1.0 53.364869 1.0 70.790880 2.0 73.021131 3.0 74.720428 0 3 25152.4 1970-01-01 00:00:03
4 4 61.394777 1.0 53.367665 1.0 70.786374 2.0 73.017372 3.0 74.723688 0 3 25152.4 1970-01-01 00:00:04

In [57]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


41L Min: 29.999213655367356%  Max: 80.0018048576522%
31L Min: 31.23459987546811%  Max: 95.37957308599094%
20L Min: 44.61135467994481%  Max: 80.16787860596584%
IM Min: 33.624084387558476%  Max: 83.7665595347359%
Surface Min: 61.520964603277655%  Max: 120.8578457226513%

Plot dam level and status


In [58]:
fig = sim_plot(3, 'n', 'level', '41L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [59]:
fig = sim_plot(3, 'n', 'level', '31L', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [60]:
fig = sim_plot(3, 'n', 'level', '20L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [61]:
fig = sim_plot(3, 'n', 'level', 'IM', [25, 95], df['time_clean'], None, y_lim=100, save_fig=True)



In [62]:
level_name = 'Surface'

x = df['time_clean']

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

ax1.plot([x[0], x[len(x)-1]], [40, 40], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
ax1.plot([x[0], x[len(x)-1]], [100, 100], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')
lines1 = ax1.plot(x, df[level_name + ' Level'])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)


ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])
ax1.yaxis.set_ticks(np.arange(0, 101, 10))
ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x), max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
del handles[1]
del labels[1]
order = [2, 1, 3, 0, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS3_' + level_name + '_n_level_and_status.png', bbox_inches='tight')
plt.close('all')


Plot power


In [63]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS3_resampled_power_n_factor.csv')

fig = sim_plot(3, 'n', 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=33372.632066665639, save_fig=True)


Scoring graph


In [64]:
scores = pd.read_csv('scoring_results.csv').set_index('Score')
scores


Out[64]:
1-factor 2-factor n-factor
Score
Feature 16.666667 20.370370 33.333333
Performance 25.586832 30.940972 52.171198

In [65]:
sns.set()

score_sim = (scores['1-factor']['Performance'], scores['2-factor']['Performance'], scores['n-factor']['Performance'])
score_feat = (scores['1-factor']['Feature'], scores['2-factor']['Feature'], scores['n-factor']['Feature'])
ind = np.arange(len(score_sim))    # the x locations for the groups
width = 0.5       # the width of the bars: can also be len(x) sequence

plt.figure(figsize=figure_size)

p1 = plt.bar(ind, score_sim, width)
p2 = plt.bar(ind, score_feat, width, bottom=score_sim)


plt.ylabel('Scores')
plt.xlabel('Control system')
    #'x-factored simulation')
plt.xticks(ind, ('1-factor', '2-factor', r'$x$-factor'))
#plt.yticks(np.arange(0, 101, 10))
plt.gca().yaxis.set_major_locator(plt.NullLocator())
#plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), loc='best')
plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

for r1, r2 in zip(p1, p2):
    h1 = r1.get_height()
    h2 = r2.get_height()
    plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%.2f" % h1, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%.2f" % h2, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 + 1.5, "%.2f" % (h1 + h2),
         ha="center", va="center", color='black', weight='bold')


plt.grid(False)
    
plt.tight_layout()
plt.savefig('output/CS3_final_scores.png', bbox_inches='tight', figsize=figure_size, dpi=1200)